function [XN,factors,params]=NormaliseGNAT2(X,ppmH,method,features,value,peakID,rawX)
% Description:
%       Normalize spectral matrix X to total area, single feature, or integral of set of features.
%
% Input:
%       X        : stack of 1D spectra
%       ppmH     : chemical shift vector
%       method   : 'total' for Total Area,
%                  'largest' for the height of the highest peaks,
%                  'PQN' for Probablistic Quotient,
%                  'quantile' for Quantile Normalization,
%                  'intensity' for normalization to single feature,
%                  'integral' for normalization to sum of set of features
%       features : only required for 'intensity' or 'integral'.
%                  For 'intensity', the ppm of the feature in X to normalize to - e.g. [10] for X(:,10).
%                  For 'integral', the range of features in X that span the peak to normalize to - eg [-.05,0.05]
%
%                  If method is set to 'quantile',
%                  optionally set feature to 'median' to take median of the ranked values instead of the mean.
%
% Output:
%       XN      : N x P matrix of normalized data
%       factors : N calculated normalization factors: XN = X / factors
%
% Log:
%             'factors' output for PQN now includes the initial 'total' normalization factors
%             'factors' output for quantile returns nan to avoid error
% Example run:
% [XN,factors]=normalize(X,ppmR,'PQN');
%
%
%   This is a part of the GNAT
%   Copyright  2024  <Mathias Nilsson>%
%   This program is free software; you can redistribute it and/or modify
%   it under the terms of the GNU General Public License as published by
%   the Free Software Foundation; either version 2 of the License, or
%   (at your option) any later version.
%
%   This program is distributed in the hope that it will be useful,
%   but WITHOUT ANY WARRANTY; without even the implied warranty of
%   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%   GNU General Public License for more details.
%
%   You should have received a copy of the GNU General Public License along
%   with this program; if not, write to the Free Software Foundation, Inc.,
%   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%
%   Dr. Mathias Nilsson
%   School of Chemistry, University of Manchester,
%   Oxford Road, Manchester M13 9PL, UK
%   Telephone: +44 (0) 161 306 4465
%   Fax: +44 (0)161 275 4598
%
%   Hugo da Silva Rocha, PhD Student
%   School of Chemistry, University of Manchester,
%   hugo.rocha@postgrad.manchester.ac.uk
%   Telephone: +44 (0) 7871080481

XN=zeros(size(X));

if nargin<2
    error('incorrect number of input parameters');
end

[obs dim]=size(X);

% Initialize params structure if not present
if nargout > 2
    params.method = method;
    params.reference_sample = [];
end

switch lower(method)
    case 'total'
        factors=repmat(NaN,[1 obs]);
        for i=1:obs
            factors(i)=sum(X(i,:));
            XN(i,:)=value*(X(i,:)./factors(i));
        end
    case 'largest'
        factors=max(max(X));
        XN=X/factors;
    case 'pqn'
        % Ask user to select reference sample for PQN
        fprintf('\n=== PQN Reference Sample Selection ===\n');
        fprintf('Please choose a sample for Probability Quotient Normalization (PQN):\n\n');
        
        reference_sample = select_pqn_reference(X, ppmH);
        
        if nargout > 2
            params.reference_sample = reference_sample;
        end
        
        [X,factorsT] = NormaliseGNAT(X,ppmH,'total',features,value);      
        X = X.*100;                                    
        X(0==X)=0.00000001;
        
        % Use selected reference sample instead of median
        normRef = repmat(X(reference_sample,:), size(X,1), 1);
        F = X./(normRef);
        
        for i=1:obs
            factors(i)=median(F(i,:));
            XN(i,:)=X(i,:)./factors(i);
        end
        factors = factors .* factorsT;                
    case 'intensity'
        factors=X(:,peakID);
        for i=1:obs
            XN(i,:)=X(i,:)./factors(i);
        end
    case 'integral'
        [~,feature(1)]=min(abs(ppmH-features(1)));
        [~,feature(2)]=min(abs(ppmH-features(2)));
        for i=1:obs
            factors(i)=trapz(rawX(i,feature(2):feature(1)));
            XN(i,:)=value*(X(i,:)./factors(i));
        end
    case 'quantile'
        if exist('features','var')
            XN = transpose(quantilenorm(X',peakID,'true'));
        else
            XN = transpose(quantilenorm(X'));
        end
        factors = nan;      
    case 'none'
        % norm_factor = max(max(X));
        % XN = X*norm_factor;
        % factors = 0;

        % does nothing
        XN = X;
        factors = 0;
end

uiwait(msgbox('The normalization calculation was carried out successfully','Normalization','help','modal'));

end

function reference_sample = select_pqn_reference(X, ppmH)
% Function to interactively select reference sample for PQN normalization
    
    [num_samples, num_variables] = size(X);
    
    % Calculate sample statistics
    sample_sums = sum(X, 2);
    sample_medians = median(X, 2);
    sample_stds = std(X, 0, 2);
    
    % Display sample information
    fprintf('Dataset: %d samples, %d variables (ppm points)\n', num_samples, num_variables);
    fprintf('Chemical shift range: %.2f to %.2f ppm\n', min(ppmH), max(ppmH));
    
    fprintf('\n%-10s %-15s %-15s %-15s\n', 'Sample', 'Total Sum', 'Median', 'Std Dev');
    fprintf('%s\n', repmat('-', 1, 60));
    
    for i = 1:num_samples
        fprintf('%-10d %-15.4f %-15.4f %-15.4f\n', ...
            i, sample_sums(i), sample_medians(i), sample_stds(i));
    end
    
    % Show additional statistics
    fprintf('\nSample Statistics Summary:\n');
    fprintf('Average total sum: %.4f\n', mean(sample_sums));
    fprintf('Median total sum: %.4f\n', median(sample_sums));
    fprintf('Most typical sample (closest to median sum): %d\n', ...
        find(abs(sample_sums - median(sample_sums)) == min(abs(sample_sums - median(sample_sums))), 1));
    
    % Get user input
    while true
        try
            reference_sample = input('\nEnter the sample number to use as reference: ');
            
            if isempty(reference_sample)
                % Use default: sample closest to median sum
                [~, reference_sample] = min(abs(sample_sums - median(sample_sums)));
                fprintf('Using sample %d (closest to median) as default reference.\n', reference_sample);
                break;
            elseif reference_sample >= 1 && reference_sample <= num_samples
                fprintf('Selected sample %d as PQN reference.\n', reference_sample);
                break;
            else
                fprintf('Invalid sample number. Please enter a number between 1 and %d.\n', num_samples);
            end
        catch
            fprintf('Invalid input. Please enter a valid sample number.\n');
        end
    end
    
    fprintf('\n');
end

% Alternative version that allows passing reference sample as parameter
function [XN,factors,params]=NormaliseGNAT_with_ref(X,ppmH,method,varargin)
% Extended version that accepts reference sample as parameter
%
% Additional input:
%   'ReferenceSample' - sample index to use as PQN reference

    p = inputParser;
    addRequired(p, 'X', @isnumeric);
    addRequired(p, 'ppmH', @isnumeric);
    addRequired(p, 'method', @ischar);
    addParameter(p, 'features', [], @isnumeric);
    addParameter(p, 'value', 1, @isnumeric);
    addParameter(p, 'peakID', [], @isnumeric);
    addParameter(p, 'ReferenceSample', [], @isnumeric);
    
    parse(p, X, ppmH, method, varargin{:});
    
    % If PQN method and reference sample is provided, use it
    if strcmpi(method, 'pqn') && ~isempty(p.Results.ReferenceSample)
        reference_sample = p.Results.ReferenceSample;
        fprintf('Using pre-selected sample %d as PQN reference.\n', reference_sample);
        
        % Call the original function with modified parameters
        if nargout > 2
            [XN, factors, params] = NormaliseGNAT(X, ppmH, method, p.Results.features, p.Results.value, p.Results.peakID);
            params.reference_sample = reference_sample;
        else
            [XN, factors] = NormaliseGNAT(X, ppmH, method, p.Results.features, p.Results.value, p.Results.peakID);
        end
    else
        % Call original function
        if nargout > 2
            [XN, factors, params] = NormaliseGNAT(X, ppmH, method, p.Results.features, p.Results.value, p.Results.peakID);
        else
            [XN, factors] = NormaliseGNAT(X, ppmH, method, p.Results.features, p.Results.value, p.Results.peakID);
        end
    end
end